**************************************************
* GENERATE BUFFER AND CALCULATE CRIME DISTANCES
**************************************************

* In AcrGIS:
	* Use file unique-lat-long-violent-crimes-2011-2020.csv
	* CRS info: EPSG:4269
	* Buffer: 0.016 degrees ≈ 1 mile
	* Generates shapefile: unique-lat-long-officer-addresses-2012-2020-buffer-0016degrees.shp



* 1. Convert 1‑mile buffer shapefile to Stata datasets
shp2dta using "unique-lat-long-officer-addresses-2012-2020-buffer-0016degrees.shp", ///
    genid(_ID) ///
    data("unique-lat-long-officer-addresses-2012-2020-buffer-0016degrees_data.dta") ///
    coor("unique-lat-long-officer-addresses-2012-2020-buffer-0016degrees_coor.dta") ///
    replace

* 2. Load violent crime records (homicides & shootings)
use "violent-crimes-linked-to-arrests-2011-2020.dta", clear
keep if inlist(crimetype, "HOMICIDE", "SHOOTING")

* 3. Point-in-polygon join: assign crimes to residential buffers
gen _Y = crime_lat
gen _X = crime_lon
geoinpoly _Y _X using "unique-lat-long-officer-addresses-2012-2020-buffer-0016degrees_coor.dta"
merge m:1 _ID using "unique-lat-long-officer-addresses-2012-2020-buffer-0016degrees_data.dta"

keep if _merge == 3
drop _merge

* 4. Compute straight-line distances (miles)
order add_unique add_lat add_lon crime_lat crime_lon
sort add_unique crime_lat crime_lon
geodist add_lat add_lon crime_lat crime_lon, generate(dist_miles) miles
sum dist_miles

* 5. Keep and rename key fields
keeporder add_unique add_lat add_lon crime_lat crime_lon dist_miles casenumber date year crimetype vict_sex vict_race arrestdate_first arrestdate_last arrestees perp_race_*
gen off_nhood_buffer_id = add_unique
rename dist_miles off_crime_dist_miles

* 6. Prefix crime fields for clarity
foreach var of varlist casenumber date year crimetype vict_sex vict_race arrestdate_first arrestdate_last arrestees perp_race_* {
    rename `var' crime_`var'
}

* 7. Variable labels
label var off_nhood_buffer_id           "1-mile buffer around unique officer address"
label var off_crime_dist_miles          "Distance (in miles) from unique officer address to crime location"
label var add_lat                       "Latitude of unique officer address"
label var add_lon                       "Longitude of unique officer address"
label var crime_lat                     "Latitude of crime"
label var crime_lon                     "Longitude of crime"
label var crime_casenumber              "Crime CPD case number"
label var crime_date                    "Crime date"
label var crime_year                    "Crime year"
label var crime_crimetype               "Crime type"
label var crime_vict_sex                "Crime victim sex"
label var crime_vict_race               "Crime victim race"
label var crime_arrestdate_first        "Date of first arrest connected to the crime"
label var crime_arrestdate_last         "Date of last arrest connected to the crime"
label var crime_arrestees               "Number of people arrested connected to the crime (as of Feb 23 2024)"
label var crime_perp_race_blk           "Any black arrests connected to the crime (as of Feb 23 2024)"
label var crime_perp_race_hsp           "Any hispanic arrests connected to the crime (as of Feb 23 2024)"
label var crime_perp_race_wht           "Any white arrests connected to the crime (as of Feb 23 2024)"
label var crime_perp_race_oth           "Any other arrests connected to the crime (as of Feb 23 2024)"
label var crime_perp_race_unk           "Any unknown race arrests connected to the crime (as of Feb 23 2024)"

* 8. Convert crime_date to numeric date
gen crime_date2 = substr(crime_date, 1, 10)
gen crime_date_num = date(crime_date2, "MDY")
format crime_date_num %td
drop crime_date2

* 9. Compress and save
compress
save "violent-crimes-linked-to-arrests-2011-2020-happening-within-1-mile-of-unique-officer-2012-2020-addresses.dta", replace





**************************************************
* ASSIGN CRIME DATA BASED ON BUFFER & WORK DAYS
**************************************************

clear all
set maxvar 120000
use "masterfile-in-geo-beats.dta", clear
drop if add_has_address == 0

*--- 1. Prepare join variables ---*
gen crime_date_num       = assgn_date_num
gen off_nhood_buffer_id  = add_unique_lat_lon

*--- 2. Join crime records to officer-days ---*
joinby off_nhood_buffer_id crime_date_num ///
    using "violent-crimes-linked-to-arrests-2011-2020-happening-within-1-mile-of-unique-officer-2012-2020-addresses.dta", ///
    unmatched(master)

*--- 3. Standardize crime type codes ---*
replace crime_crimetype = lower(crime_crimetype)
replace crime_crimetype = substr(crime_crimetype, 1, 3)
tab crime_crimetype

**************************************************
* DISTANCE FLAGGING IN 1/8th INCREMENTS
**************************************************

*--- 4. Bin distances into 1/8‑mile intervals ---*
gen off_crime_dist_q = ""
replace off_crime_dist_q = "q1" if off_crime_dist_miles >= 0       & off_crime_dist_miles <= (1/8)
replace off_crime_dist_q = "q2" if off_crime_dist_miles >  (1/8)   & off_crime_dist_miles <= (2/8)
replace off_crime_dist_q = "q3" if off_crime_dist_miles >  (2/8)   & off_crime_dist_miles <= (3/8)
replace off_crime_dist_q = "q4" if off_crime_dist_miles >  (3/8)   & off_crime_dist_miles <= (4/8)
replace off_crime_dist_q = "q5" if off_crime_dist_miles >  (4/8)   & off_crime_dist_miles <= (5/8)
replace off_crime_dist_q = "q6" if off_crime_dist_miles >  (5/8)   & off_crime_dist_miles <= (6/8)
replace off_crime_dist_q = "q7" if off_crime_dist_miles >  (6/8)   & off_crime_dist_miles <= (7/8)
replace off_crime_dist_q = "q8" if off_crime_dist_miles >  (7/8)   & off_crime_dist_miles != .

**************************************************
* CRIME × DISTANCE INDICATORS
**************************************************

*--- 5. By crime type & distance ---*
foreach d in q1 q2 q3 q4 q5 q6 q7 q8 {
    foreach c in hom sho {
        gen byte cri_`c'_`d' = (crime_crimetype == "`c'" & off_crime_dist_q == "`d'")
    }
}

**************************************************
* CRIME BY VICTIM RACE
**************************************************

*--- 6. Harmonize victim race categories ---*
replace crime_vict_race = "blk" if crime_vict_race == "BLK" 
replace crime_vict_race = "wht" if crime_vict_race == "WHI"
replace crime_vict_race = "hsp" if crime_vict_race == "WWH" | crime_vict_race == "WBH"
replace crime_vict_race = "oth" if inlist(crime_vict_race, "UNKNOWN", "I", "API")

*--- 7. By distance & victim race ---*
foreach d in q1 q2 q3 q4 q5 q6 q7 q8 {
    foreach v in blk hsp wht oth {
        foreach c in hom sho {
            gen byte cri_`c'_`d'_v_`v' = ///
                (crime_crimetype == "`c'" & off_crime_dist_q == "`d'" & crime_vict_race == "`v'")
        }
    }
}

**************************************************
* CRIME BY PERPETRATOR RACE
**************************************************

*--- 8. By distance & perpetrator race ---*
foreach d in q1 q2 q3 q4 q5 q6 q7 q8 {
    foreach p in blk hsp wht oth unk {
        foreach c in hom sho {
            gen byte cri_`c'_`d'_p_`p' = ///
                (crime_crimetype == "`c'" & off_crime_dist_q == "`d'" & crime_perp_race_`p' == 1)
        }
    }
}

**************************************************
* CRIME BY VICTIM & PERPETRATOR RACE
**************************************************

*--- 9. By distance, victim & perpetrator race ---*
foreach d in q1 q2 q3 q4 q5 q6 q7 q8 {
    foreach p in blk hsp wht oth unk {
        foreach v in blk hsp wht oth {
            foreach c in hom sho {
                gen byte cri_`c'_`d'_v_`v'_p_`p' = ///
                    (crime_crimetype == "`c'" & off_crime_dist_q == "`d'" & ///
                     crime_vict_race == "`v'" & crime_perp_race_`p' == 1)
            }
        }
    }
}

**************************************************
* COLLAPSE TO OFFICER‑DAY LEVEL
**************************************************

*--- 10. Sum indicators by officer & day ---*
gcollapse (sum) cri_*, by(uid assgn_date_num) sumcheck
compress

**************************************************
* LEAD/LAG AGGREGATES (±3 WEEKS)
**************************************************

*--- 11. Generate lags & leads for victim‑race measures ---*
sort uid assgn_date_num
foreach var of varlist *_q1 *_q2 *_q3 *_q4 *_q5 *_q6 *_q7 *_q8 *v_blk *v_hsp *v_wht *v_oth *p_blk *p_hsp *p_wht *p_oth *p_unk  {
    forval d = 0/21 {
        by uid: gen byte `var'_lg`d' = `var'[_n-`d']
        by uid: gen byte `var'_ld`d' = `var'[_n+`d']
    }
    * 1–3 weeks before
    egen byte `var'_1wb = rowtotal(`var'_lg0–`var'_lg7)
    egen byte `var'_2wb = rowtotal(`var'_lg8–`var'_lg14)
    egen byte `var'_3wb = rowtotal(`var'_lg15–`var'_lg21)
    * 1–3 weeks after
    egen byte `var'_1wa = rowtotal(`var'_ld1–`var'_ld7)
    egen byte `var'_2wa = rowtotal(`var'_ld8–`var'_ld14)
    egen byte `var'_3wa = rowtotal(`var'_ld15–`var'_ld21)
    drop `var'_lg* `var'_ld* `var'
}


keeporder uid assgn_date_num cri_*wb cri_*wa

**************************************************
* MERGE BACK TO MASTERFILE & CREATE CONTROLS
**************************************************

*--- 12. Merge aggregated crime measures ---*
merge 1:1 uid assgn_date_num using "masterfile-in-geo-beats.dta"
drop _merge

*--- 13. Day-of-week and BSDM and YBSDM grouping variables ---*
gen assgn_dow = dow(assgn_date_num)
egen assgn_ybsdm = group(assgn_year assgn_month assgn_dow assgn_shift assgn_beat)
egen assgn_bsdm  = group(assgn_month assgn_dow assgn_shift assgn_beat)

*--- 14. Officer control variables ---*
* Rank indicators
gen rankfirst   = substr(off_rank, 1, 1)
tab rankfirst
gen off_rank_po = (rankfirst == "P")
gen off_rank_sg = (rankfirst == "S")
gen off_rank_lt = (rankfirst == "L")
drop rankfirst

* Months of service
gen off_months_service    = (assgn_date_num - off_apt_date) / 30
replace off_months_service = 0 if off_months_service < 0
gen off_months_service_sq = off_months_service^2

* Gender indicator
gen off_male = (off_gender == "male")

* Lives in same district/beat
gen off_lives_district = (add_pol_district == assgn_unit)
gen off_lives_beat     = (assgn_beat_numerical_part == add_pol_beat)
sum off_lives_*

**************************************************
* SCALE & FINALIZE OUTCOME VARIABLES
**************************************************

*--- 15. Scale use‑of‑force, arrests, stops per 100 shifts ---*
foreach var of varlist uof_* arr_* stp_* {
    replace `var' = `var' * 100
}

*--- 16. Rename male & female (MAF) indicators to "all" ---*
ren *_maf_all *

*--- 17. Binarize crime indicators ---*
foreach var of varlist cri_* {
    replace `var' = 1 if `var' > 0
}

**************************************************
* VARIABLE LABELS
**************************************************

lab var assgn_beat_numerical_part "Assigned beat (numerical part of the original beat code)"
lab var assgn_beat_geographic    "Assigned beat is geographic (matches CPD beat shapefile)"

lab var uof_tot           "UOF all types"
lab var uof_blk_mal_yin   "UOF against black male with injury"
lab var uof_blk_mal_nin   "UOF against black male without injury"
lab var uof_blk_fem_yin   "UOF against black female with injury"
lab var uof_blk_fem_nin   "UOF against black female without injury"
lab var uof_blk_mal_all   "UOF against black male with or without injury"
lab var uof_blk_fem_all   "UOF against black female with or without injury"
lab var uof_blk_maf_yin   "UOF against black male or female with injury"
lab var uof_blk_maf_nin   "UOF against black male or female without injury"
lab var uof_blk           "UOF against black male or female with or without injury"

**************************************************
* REORDER & SAVE
**************************************************

keeporder uid off_* add_* assgn_* uof_* arr_* stp_* cri_*wb cri_*wa
compress

save "masterfile-in-geo-beats-with-crime-for-analyses.dta", replace

